1 Setup

2 Environment

Set the working dir

setwd("/mnt/picea/projects/arabidopsis/mschmid/porcupine-RNA-Seq")

Libs

suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(gplots))
suppressPackageStartupMessages(library(Glimma))
suppressPackageStartupMessages(library(LSD))
suppressPackageStartupMessages(library(Mfuzz))
suppressPackageStartupMessages(library(org.At.tair.db))
suppressPackageStartupMessages(library(pander))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(VennDiagram))

Helper files

suppressMessages(source("~/Git/UPSCb/src/R/densityPlot.R"))
suppressMessages(source("~/Git/UPSCb/src/R/plotMA.R"))
suppressMessages(source("~/Git/UPSCb/src/R/volcanoPlot.R"))
suppressMessages(source("~/Git/UPSCb/src/R/plot.multidensity.R"))

Load saved data Read the sample information

samples <- read.csv("~/Git/UPSCb/projects/arabidopsis-porcupine-RNA-Seq/doc/RNA-seq_pcp_vs_Col-0_info.csv")

And extend it

samples$Genotype=sub(" .*","",samples$Description)
samples$Temp=ifelse(sub(".* ","",samples$Description)=="16",
                    ifelse(nchar(as.character(samples$Description))>16,"WC","C"),
                    ifelse(nchar(as.character(samples$Description))>16,"CW","W"))

Gene raw expression

cg <- read.csv("analysis/kallisto/combined-raw-unormalised-gene-expression_data.csv",
               row.names=1)

Transcript raw expression

ct <- read.csv("analysis/kallisto/combined-raw-unormalised-transcript-expression_data.csv",
               row.names=1)

Gene normalised expression

vst.g <- read.csv("analysis/kallisto/combined-library-size-normalized_variance-stabilized_gene-expression_data.csv",
               row.names=1)

Transcript raw expression

vst.t <- read.csv("analysis/kallisto/combined-library-size-normalized_variance-stabilized_transcript-expression_data.csv",
               row.names=1)

Setup graphics

pal=brewer.pal(8,"Dark2")
mar <- par("mar")

This is pcp

pcp <- "AT2G18740"

3 Process

Re-order the samples

stopifnot(all(colnames(cg) == colnames(vst.g)))
stopifnot(all(colnames(ct) == colnames(vst.t)))
stopifnot(all(colnames(ct) == colnames(cg)))
colnames(ct) <- colnames(cg) <- colnames(vst.t) <- colnames(vst.g) <- sub("X","",colnames(cg))
samples <- samples[match(colnames(ct),samples$SampleID),]

3.1 Gather the annotation

df.annot <- data.frame(GeneID=rownames(cg),
                 SYMBOL=mapIds(org.At.tair.db,keys=rownames(cg),"SYMBOL","TAIR"),
                 GENENAME=mapIds(org.At.tair.db,keys=rownames(cg),"GENENAME","TAIR"))
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
tx.annot <- df.annot[match(sub("\\.[0-9]+","",rownames(vst.t)),df.annot$GeneID),]
row.names(tx.annot) <- rownames(vst.t)
tx.annot$TranscriptID <- rownames(vst.t)

3.2 pcp expression

3.2.1 gene expression

ord <- order(samples$Genotype,samples$Temp)
plotmatrix(as.matrix(vst.g[pcp,ord]),main="pcp gene expression",
           ylab="vst expression",xlabels = paste(samples$Genotype,
                                                 samples$Temp,sep="-")[ord],
           las=2,col=pal)

Same as a dotchart

dat <- split(unlist(vst.g[pcp,]),paste(samples$Genotype,
                                       samples$Temp,sep="-"))

In the dotcharts, the coloured dots represent the mean expression of the given condition

dotchart(do.call("cbind",dat),
         gdata=sapply(dat,mean),pch = 1,
         labels = "",gpch = 19,gcolor = pal,
         xlab="vst expression",
         main="pcp gene expression")

3.2.2 transcript expression

plotmatrix(as.matrix(vst.t[grep(pcp,rownames(vst.t)),ord]),main="pcp gene expression",
           ylab="vst expression",xlabels = paste(samples$Genotype,
                                                 samples$Temp,sep="-")[ord],
           las=2,col=pal)

In the dotcharts, the coloured dots represent the mean expression of the given condition

dev.null <- sapply(grep(pcp,rownames(vst.t)),function(pos){
    dat <- split(unlist(vst.t[pos,]),
                            paste(samples$Genotype,
                                  samples$Temp,sep="-"))
    dotchart(do.call("cbind",dat),
             gdata=sapply(dat,mean),pch = 1,
             labels = "",gpch = 19,gcolor = pal,
             xlab="vst expression",
             main=paste("pcp",rownames(vst.t)[pos],"expression"))
})

3.3 DE at the gene level

dds.g <- DESeqDataSetFromMatrix(
  countData = cg,
  colData = data.frame(genotype=samples$Genotype,
                       age=samples$Age.in.days,
                       temp=samples$Temp,
                       finalTemp=factor(substr(samples$Temp,
                                               nchar(samples$Temp),
                                               nchar(samples$Temp)))),
  design = ~genotype * finalTemp)

Differential Expression

suppressMessages(dds.g <- DESeq(dds.g))

Dispersion Estimation

The dispersion estimation is adequate

plotDispEsts(dds.g)

3.3.1 pcp vs col0

res <- results(dds.g,name="genotype_pcp_vs_Col.0")

assume a 1% FDR

alpha=0.01

Plot the Median vs Average

There are many genes that appear differentially expressed at a 1% FDR cutoff The log2 fold-change range is relatively broad, with three extreme values

plotMA(res,alpha)

Plot the log odds vs. log2 fold change

The volcano plot shows the same results as the MA plot; a large number of genes show significant fold-changes

volcanoPlot(res,alpha=alpha)

Plot the adjusted p-value histogram

Which is almost evenly distributed, with an enrichment for lower p-values (more significant)

hist(res$padj,breaks=seq(0,1,.01))

Select genes below alpha

Note the 0.5 cutoff on the log fold change is motivated by the Schurch et al. RNA, 2016 publication

sel <- res$padj<alpha & !is.na(res$padj) & abs(res$log2FoldChange) >= 0.5
sprintf("There are %s genes differentially expressed at a %s cutoff",
        sum(sel),alpha)
## [1] "There are 1440 genes differentially expressed at a 0.01 cutoff"

Equally distributed between Wt (-1) and pcp (1)

pander(table(sign(res[sel,"log2FoldChange"])))
-1 1
734 706

Write them out

dir.create("analysis/DESeq2/Kallisto-gene/genotype-effect",
           showWarnings=FALSE,recursive=TRUE)
write.csv(df.annot[sel,],
      file=file.path("analysis/DESeq2/Kallisto-gene/genotype-effect",
                     "pcp-vs-wt_one-percent-FDR-cutoff_significant-genes.csv"))

write.csv(df.annot[sel,][sign(res$log2FoldChange[sel])==1,],
      file=file.path("analysis/DESeq2/Kallisto-gene/genotype-effect",
                     "pcp-up-regulated_one-percent-FDR-cutoff_significant-genes.txt"))

write.csv(df.annot[sign(res$log2FoldChange[sel])==-1,],
      file=file.path("analysis/DESeq2/Kallisto-gene/genotype-effect",
                     "wt-up-regulated_one-percent-FDR-cutoff_significant-genes.txt"))

write.csv(cbind(as.data.frame(res),df.annot),
          file=file.path("analysis/DESeq2/Kallisto-gene/genotype-effect",
                         "pcp_vs_wt.csv"))

pcp.vs.wt <- rownames(res)[sel]

3.3.1.1 Cluster the VST expression of the genotype genes

The color code for the column next to the row dendogram (genes) is yellow for significant positive fold change and darkorange for significant negative fold change

heatmap.2(as.matrix(vst.g[rownames(vst.g) %in% pcp.vs.wt,]),
          scale="row",labRow=NA,trace="none",
          RowSideColors=ifelse(sign(res[sel,"log2FoldChange"])==-1,
                               "darkorange",
                               "yellow"),
          labCol=paste(colData(dds.g)$genotype,colData(dds.g)$temp,sep="-"))

3.3.1.2 Interactive MDS

res.df <- as.data.frame(res)
res.df$log10MeanNormCount <- log10(res.df$baseMean)
idx <- rowSums(counts(dds.g)) > 0
res.df <- res.df[idx,]
res.df$padj[is.na(res.df$padj)] <- 1
glMDPlot(res.df,
         xval = "log10MeanNormCount",
         yval = "log2FoldChange",
         counts = counts(dds.g)[idx,],
         anno = df.annot[idx,],
         groups = dds.g$genotype,
         samples = paste(samples$Genotype,samples$Temp,samples$Replicate,sep="-"),
         status = sel[idx],
         display.columns = c("GeneID", "SYMBOL","GENENAME"),
         path = "analysis/DESeq2/Kallisto-gene/genotype-effect/",
         folder = "report",launch=FALSE)

3.3.2 Final temperature W vs. C

res <- results(dds.g,name="finalTemp_W_vs_C")

assume a 1% FDR

alpha=0.01

There are many genes that appear differentially expressed. The log2 fold-change range is relatively broad, with two extreme positive values

plotMA(res,alpha)

Plot the log odds vs. log2 fold change

The volcano plot shows the same results as the MA plot; a large number of genes show significant fold-changes

volcanoPlot(res,alpha=alpha)

Plot the adjusted p-value histogram

Which is almost evenly distributed, with an enrichment for lower p-values (most significant)

hist(res$padj,breaks=seq(0,1,.01),xlab="FDR")

Select genes below alpha

Note the 0.5 cutoff on the log fold change is motivated by the Schurch et al. RNA, 2016 publication

sel <- res$padj<alpha & !is.na(res$padj) & abs(res$log2FoldChange) >= 0.5
sprintf("There are %s genes differentially expressed at a %s cutoff",
        sum(sel),alpha)
## [1] "There are 1903 genes differentially expressed at a 0.01 cutoff"

Slightly skewed towards over-expression in cold (-1) than in warm (1)

pander(table(sign(res[sel,"log2FoldChange"])))
-1 1
1066 837

Write them out

dir.create("analysis/DESeq2/Kallisto-gene/temperature-effect",showWarnings=FALSE)
write.csv(df.annot[sel,],
      file=file.path("analysis/DESeq2/Kallisto-gene/temperature-effect",
                     "Warm-vs-Cold-final-temperature_one-percent-FDR-cutoff_significant-genes.txt"))

write.csv(df.annot[sel,][sign(res$log2FoldChange[sel])==1,],
      file=file.path("analysis/DESeq2/Kallisto-gene/temperature-effect",
                     "Warm-final-temperature-up-regulated_one-percent-FDR-cutoff_significant-genes.txt"))

write.csv(df.annot[sel,][sign(res$log2FoldChange[sel])==-1,],
      file=file.path("analysis/DESeq2/Kallisto-gene/temperature-effect",
                     "Cold-final-temperature-up-regulated_one-percent-FDR-cutoff_significant-genes.txt"))

write.csv(cbind(as.data.frame(res),df.annot),
          file=file.path("analysis/DESeq2/Kallisto-gene/temperature-effect",
                         "Warm-vs-Cold-final-temperature.csv"))

warm.vs.cold <- rownames(res)[sel]

3.3.2.1 Cluster the VST expression of the temperat genes

The color code for the column next to the row dendogram (genes) is yellow for significant positive fold change and dark orange for significant negative fold change

heatmap.2(as.matrix(vst.g[rownames(vst.g) %in% warm.vs.cold,]),
          scale="row",labRow=NA,trace="none",
          RowSideColors=ifelse(sign(res[sel,"log2FoldChange"])==-1,
                               "darkorange",
                               "yellow"),
          labCol=paste(colData(dds.g)$genotype,colData(dds.g)$temp,sep="-"))

3.3.2.2 Interactive MDS

res.df <- as.data.frame(res)
res.df$log10MeanNormCount <- log10(res.df$baseMean)
idx <- rowSums(counts(dds.g)) > 0
res.df <- res.df[idx,]
res.df$padj[is.na(res.df$padj)] <- 1
glMDPlot(res.df,
         xval = "log10MeanNormCount",
         yval = "log2FoldChange",
         counts = counts(dds.g)[idx,],
         anno = df.annot[idx,],
         groups = dds.g$genotype,
         samples = paste(samples$Genotype,samples$Temp,samples$Replicate,sep="-"),
         status = sel[idx],
         display.columns = c("GeneID", "SYMBOL","GENENAME"),
         path = "analysis/DESeq2/Kallisto-gene/temperature-effect",
         folder = "report",launch=FALSE)

3.3.3 Interaction term between pcp and final temp

res <- results(dds.g,name="genotypepcp.finalTempW")

assume a 1% FDR

alpha=0.01

There are not many genes that appear differentially expressed. The log2 fold-change range is relatively broad, with one extreme value.

plotMA(res,alpha)

Plot the log odds vs. log2 fold change

The volcano plot shows the same results as the MA plot; a large number of genes show large insignificant fold-changes in term of the interaction of pcp and final temperature

volcanoPlot(res,alpha=alpha)

Plot the adjusted p-value histogram

Which is almost evenly distributed, with an enrichment for higher p-values (less significant)

hist(res$padj,breaks=seq(0,1,.01),xlab="FDR")

Select genes below alpha

Note the 0.5 cutoff on the log fold change is motivated by the Schurch et al. RNA, 2016 publication

sel <- res$padj<alpha & !is.na(res$padj) & abs(res$log2FoldChange) >= 0.5
sprintf("There are %s genes differentially expressed at a %s cutoff",
        sum(sel),alpha)
## [1] "There are 319 genes differentially expressed at a 0.01 cutoff"

Almost evenly distributed

pander(table(sign(res[sel,"log2FoldChange"])))
-1 1
161 158

Write them out

dir.create("analysis/DESeq2/Kallisto-gene/interaction-term",
           showWarnings = FALSE)
write.csv(df.annot[sel,],
      file=file.path("analysis/DESeq2/Kallisto-gene/interaction-term",
                     "pcp-final-temperature-interaction_one-percent-FDR-cutoff_significant-genes.txt"))

write.csv(df.annot[sel,][sign(res$log2FoldChange[sel])==1,],
      file=file.path("analysis/DESeq2/Kallisto-gene/interaction-term",
                     "pcp-final-temperature-interaction-up-regulated_one-percent-FDR-cutoff_significant-genes.txt"))

write.csv(df.annot[sel,][sign(res$log2FoldChange[sel])==-1,],
      file=file.path("analysis/DESeq2/Kallisto-gene/interaction-term",
                     "pcp-final-temperature-interaction-down-regulated_one-percent-FDR-cutoff_significant-genes.txt"))

write.csv(cbind(as.data.frame(res),df.annot),
          file=file.path("analysis/DESeq2/Kallisto-gene/interaction-term",
                         "pcp-final-temperature-interaction.csv"))

pcp.temperature <- rownames(res)[sel]

3.3.3.1 Cluster the VST expression of the interaction genes

The color code for the column next to the row dendogram (genes) is yellow for significant positive fold change and dark orange for significant negative fold change

heatmap.2(as.matrix(vst.g[rownames(vst.g) %in% pcp.temperature,]),
          scale="row",labRow=NA,trace="none",
          RowSideColors=ifelse(sign(res[sel,"log2FoldChange"])==-1,
                               "darkorange",
                               "yellow"),
          labCol=paste(colData(dds.g)$genotype,colData(dds.g)$temp,sep="-"))

3.3.3.2 Interactive MDS

res.df <- as.data.frame(res)
res.df$log10MeanNormCount <- log10(res.df$baseMean)
idx <- rowSums(counts(dds.g)) > 0
res.df <- res.df[idx,]
res.df$padj[is.na(res.df$padj)] <- 1
glMDPlot(res.df,
         xval = "log10MeanNormCount",
         yval = "log2FoldChange",
         counts = counts(dds.g)[idx,],
         anno = df.annot[idx,],
         groups = dds.g$genotype,
         samples = paste(samples$Genotype,samples$Temp,samples$Replicate,sep="-"),
         status = sel[idx],
         display.columns = c("GeneID", "SYMBOL","GENENAME"),
         path = "analysis/DESeq2/Kallisto-gene/interaction-term",
         folder = "report",launch=FALSE)

3.3.3.3 Soft clustering (Mfuzz)

Create the eSet

eset <- ExpressionSet(as.matrix(vst.g[sel,]))

Standardise

eset.s <- standardise(eset)

Average the replicates (mean)

eset.m <- ExpressionSet(
    sapply(split.data.frame(t(exprs(eset.s)),
                            paste0(samples$Genotype,samples$Temp)),
           colMeans))

Estimate the fuzzification

m <- mestimate(eset.m)

Find the clusters (12 is based on the previous heatmap)

cl <- mfuzz(eset.m,centers=12,m=m)

Order the replicates

ord <- c(4,1:3,8,5:7)

There are a number of clusters that behave similarly

mfuzz.plot(eset.m[,ord],
           cl=cl,
           mfrow=c(1,1),
           time.labels = colnames(eset.m)[ord],
           new.window=FALSE)

write.csv(cbind(as.data.frame(res),df.annot),
          file=file.path("analysis/DESeq2/Kallisto-gene/interaction-term",
                         "pcp-final-temperature-interaction.csv"))

Writing the core genes from the clusters

dir.create("analysis/DESeq2/Kallisto-gene/interaction-term/clusters",
           showWarnings = FALSE)

Not all genes will be part of the core genes from the clusters

dev.null <- sapply(1:12,function(i,d){
  nam <- d[[i]]
  write.csv(cbind(ID=nam,df.annot[nam,]),
            file=file.path("analysis/DESeq2/Kallisto-gene/interaction-term/clusters",
                           paste("cluster",i,sep="-")))
},lapply(sapply(acore(eset.m,cl,0.5),"[[","NAME"),as.character))

3.3.4 Check the overlap of the three sets

plot.new()
grid.draw(venn.diagram(list(
  pcp.vs.wt=pcp.vs.wt,
  warm.vs.cold=warm.vs.cold,
  pcp.temperature=pcp.temperature),
  filename=NULL,
  col=pal[1:3],
  category.names=c("genotype","temperature","interaction")))

3.3.5 Compare the DE gene sets with those identified by STAR + HTSeq.

compareKallistoHTSeq <- function(kfile,hfile){
    k <- read.csv(kfile,row.names=1)
    h <- read.csv(hfile,row.names=1)
    k <- k[match(sub("\\.0$","",rownames(h)),rownames(k)),]
    heatscatter(k[,"log2FoldChange"],h[,"log2FoldChange"],main="Log2 Fold-changes comparison",
                xlab="Kallisto",ylab="HTSeq",cor=TRUE)
    abline(0,1,lty=2,col="gray")
    qqplot(k[,"log2FoldChange"],h[,"log2FoldChange"],main="QQplot",xlab="Kallisto",ylab="HTSeq")
    abline(0,1,lty=2,col="gray")
    list(spearman=suppressWarnings(cor.test(k[,"log2FoldChange"],h[,"log2FoldChange"],method="spearman")),
         wilcoxon=wilcox.test(k[,"log2FoldChange"],h[,"log2FoldChange"]),
        friedman=friedman.test(cbind(k[,"log2FoldChange"],h[,"log2FoldChange"])),
        quade=quade.test(cbind(k[,"log2FoldChange"],h[,"log2FoldChange"])))
}

3.3.5.1 The genotype effects are similar as assessed by both methods

compareKallistoHTSeq("analysis/DESeq2/Kallisto-gene/genotype-effect/pcp_vs_wt.csv",
                     "analysis/DESeq2/pcp_vs_wt.csv")

## $spearman
## 
##  Spearman's rank correlation rho
## 
## data:  k[, "log2FoldChange"] and h[, "log2FoldChange"]
## S = 5.5816e+10, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9760598 
## 
## 
## $wilcoxon
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  k[, "log2FoldChange"] and h[, "log2FoldChange"]
## W = 296760000, p-value = 0.4028
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $friedman
## 
##  Friedman rank sum test
## 
## data:  cbind(k[, "log2FoldChange"], h[, "log2FoldChange"])
## Friedman chi-squared = 6143.8, df = 1, p-value < 2.2e-16
## 
## 
## $quade
## 
##  Quade test
## 
## data:  cbind(k[, "log2FoldChange"], h[, "log2FoldChange"])
## Quade F = 4435.9, num df = 1, denom df = 27415, p-value < 2.2e-16
h.pcp.vs.wt <- sub("\\.0","",scan(what="character",
                                  file="analysis/DESeq2/pcp-vs-wt_one-percent-FDR-cutoff_significant-genes.txt"))

plot.new()
grid.draw(venn.diagram(list(
  kallisto=pcp.vs.wt,
  htseq=h.pcp.vs.wt),
  filename=NULL,
  col=pal[1:2],
  category.names=c("kallisto (pcp)","htseq (pcp)")))

3.3.5.2 The final temperature effects are similar as assessed by both methods

compareKallistoHTSeq("analysis/DESeq2/Kallisto-gene/Warm-vs-Cold-final-temperature.csv","analysis/DESeq2/Warm-vs-Cold-final-temperature.csv")

## $spearman
## 
##  Spearman's rank correlation rho
## 
## data:  k[, "log2FoldChange"] and h[, "log2FoldChange"]
## S = 5.6955e+10, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9755713 
## 
## 
## $wilcoxon
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  k[, "log2FoldChange"] and h[, "log2FoldChange"]
## W = 298540000, p-value = 0.758
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $friedman
## 
##  Friedman rank sum test
## 
## data:  cbind(k[, "log2FoldChange"], h[, "log2FoldChange"])
## Friedman chi-squared = 337.34, df = 1, p-value < 2.2e-16
## 
## 
## $quade
## 
##  Quade test
## 
## data:  cbind(k[, "log2FoldChange"], h[, "log2FoldChange"])
## Quade F = 17.175, num df = 1, denom df = 27415, p-value =
## 3.419e-05
h.warm.vs.cold <- sub("\\.0","",scan(what="character",file="analysis/DESeq2/Warm-vs-Cold-final-temperature_one-percent-FDR-cutoff_significant-genes.txt"))

plot.new()
grid.draw(venn.diagram(list(
  kallisto=warm.vs.cold,
  htseq=h.warm.vs.cold),
  filename=NULL,
  col=pal[1:2],
  category.names=c("kallisto (temp)","htseq (temp)")))

3.3.5.3 The genotype effects are similar assessed by both methods

compareKallistoHTSeq("analysis/DESeq2/Kallisto-gene/pcp-final-temperature-interaction.csv","analysis/DESeq2/pcp-final-temperature-interaction.csv")

## $spearman
## 
##  Spearman's rank correlation rho
## 
## data:  k[, "log2FoldChange"] and h[, "log2FoldChange"]
## S = 7.8345e+10, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9663966 
## 
## 
## $wilcoxon
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  k[, "log2FoldChange"] and h[, "log2FoldChange"]
## W = 299430000, p-value = 0.3791
## alternative hypothesis: true location shift is not equal to 0
## 
## 
## $friedman
## 
##  Friedman rank sum test
## 
## data:  cbind(k[, "log2FoldChange"], h[, "log2FoldChange"])
## Friedman chi-squared = 1588.7, df = 1, p-value < 2.2e-16
## 
## 
## $quade
## 
##  Quade test
## 
## data:  cbind(k[, "log2FoldChange"], h[, "log2FoldChange"])
## Quade F = 751.04, num df = 1, denom df = 27415, p-value < 2.2e-16
h.pcp.temperature <- sub("\\.0","",scan(what="character",file="analysis/DESeq2/pcp-final-temperature-interaction_one-percent-FDR-cutoff_significant-genes.txt"))

plot.new()
grid.draw(venn.diagram(list(
  kallisto=pcp.temperature,
  htseq=h.pcp.temperature),
  filename=NULL,
  col=pal[1:2],
  category.names=c("kallisto (temp)","htseq (temp)")))

3.3.6 Conclusion

The results of the gene DE analysis are very similar to those obtained from STAR + HTSeq. The difference in using Kallisto probably arises from it rescuing multi-mapping reads.

3.4 At the transcript level

dds.t <- DESeqDataSetFromMatrix(
  countData = ct,
  colData = data.frame(genotype=samples$Genotype,
                       age=samples$Age.in.days,
                       temp=samples$Temp,
                       finalTemp=factor(substr(samples$Temp,
                                               nchar(samples$Temp),
                                               nchar(samples$Temp)))),
  design = ~genotype * finalTemp)

Differential Expression

suppressMessages(dds.t <- DESeq(dds.t))

Dispersion Estimation

The dispersion estimation is adequate

plotDispEsts(dds.t)

3.4.1 pcp vs col0

res <- results(dds.t,name="genotype_pcp_vs_Col.0")

assume a 1% FDR

alpha=0.01

Plot the Median vs Average There are many genes that appear differentially expressed at a 1% FDR cutoff The log2 fold-change range is relatively broad, with three extreme values

plotMA(res,alpha)

Plot the log odds vs. log2 fold change

The volcano plot shows the same results as the MA plot; a large number of genes show significant fold-changes

volcanoPlot(res,alpha=alpha)

Plot the adjusted p-value histogram

Which is almost evenly distributed, with an enrichment for lower p-values (more significant)

hist(res$padj,breaks=seq(0,1,.01))

Select genes below alpha

Note the 0.5 cutoff on the log fold change is motivated by the Schurch et al. RNA, 2016 publication

sel <- res$padj<alpha & !is.na(res$padj) & abs(res$log2FoldChange) >= 0.5
sprintf("There are %s transcripts differentially expressed at a %s cutoff",
        sum(sel),alpha)
## [1] "There are 1628 transcripts differentially expressed at a 0.01 cutoff"

Almost equally distributed between Wt (-1) and pcp (1)

pander(table(sign(res[sel,"log2FoldChange"])))
-1 1
857 771

Write them out

dir.create("analysis/DESeq2/Kallisto-transcript/genotype-effect",
           showWarnings=FALSE,recursive=TRUE)
write.csv(cbind(txID=rownames(res)[sel],tx.annot[sel,]),
      file=file.path("analysis/DESeq2/Kallisto-transcript/genotype-effect",
                     "pcp-vs-wt_one-percent-FDR-cutoff_significant-transcripts.txt"))


write.csv(cbind(txID=rownames(res)[sel & sign(res$log2FoldChange)==1],
                tx.annot[sel & sign(res$log2FoldChange)==1,]),
      file=file.path("analysis/DESeq2/Kallisto-transcript/genotype-effect",
                     "pcp-up-regulated_one-percent-FDR-cutoff_significant-transcripts.txt"))

write.csv(cbind(txID=rownames(res)[sel & sign(res$log2FoldChange) == -1],
                tx.annot[sel & sign(res$log2FoldChange) == -1,]),
      file=file.path("analysis/DESeq2/Kallisto-transcript/genotype-effect",
                     "wt-up-regulated_one-percent-FDR-cutoff_significant-transcripts.txt"))

write.csv(cbind(as.data.frame(res),tx.annot),
          file=file.path("analysis/DESeq2/Kallisto-transcript/genotype-effect",
                         "pcp_vs_wt.csv"))

t.pcp.vs.wt <- rownames(res)[sel]

3.4.1.1 Cluster the VST expression of the genotype genes

The color code for the column next to the row dendogram (genes) is yellow for significant positive fold change and darkorange for significant negative fold change

heatmap.2(as.matrix(vst.t[rownames(vst.t) %in% t.pcp.vs.wt,]),
          scale="row",labRow=NA,trace="none",
          RowSideColors=ifelse(sign(res[sel,"log2FoldChange"])==-1,
                               "darkorange",
                               "yellow"),
          labCol=paste(colData(dds.t)$genotype,colData(dds.t)$temp,sep="-"))

3.4.1.2 Interactive MDS

res.df <- as.data.frame(res)
res.df$log10MeanNormCount <- log10(res.df$baseMean)
idx <- rowSums(counts(dds.t)) > 0
res.df <- res.df[idx,]
res.df$padj[is.na(res.df$padj)] <- 1
glMDPlot(res.df,
         xval = "log10MeanNormCount",
         yval = "log2FoldChange",
         counts = counts(dds.t)[idx,],
         anno = tx.annot[idx,],
         groups = dds.t$genotype,
         samples = paste(samples$Genotype,samples$Temp,samples$Replicate,sep="-"),
         status = sel[idx],
         display.columns = c("TranscriptID", "SYMBOL","GENENAME"),
         id.column="TranscriptID",
         path = "analysis/DESeq2/Kallisto-transcript/genotype-effect",
         folder = "report",launch=FALSE)

3.4.2 Final temperature W vs. C

res <- results(dds.t,name="finalTemp_W_vs_C")

assume a 1% FDR

alpha=0.01

There are many genes that appear differentially expressed. The log2 fold-change range is relatively broad, with two extreme positive values

plotMA(res,alpha)

Plot the log odds vs. log2 fold change

The volcano plot shows the same results as the MA plot; a large number of genes show significant fold-changes

volcanoPlot(res,alpha=alpha)

Plot the adjusted p-value histogram

Which is almost evenly distributed, with an enrichment for lower p-values (most significant)

hist(res$padj,breaks=seq(0,1,.01),xlab="FDR")

Select genes below alpha

Note the 0.5 cutoff on the log fold change is motivated by the Schurch et al. RNA, 2016 publication

sel <- res$padj<alpha & !is.na(res$padj) & abs(res$log2FoldChange) >= 0.5
sprintf("There are %s transcripts differentially expressed at a %s cutoff",
        sum(sel),alpha)
## [1] "There are 1960 transcripts differentially expressed at a 0.01 cutoff"

Slightly skewed towards over-expression in cold (-1) than in warm (1)

pander(table(sign(res[sel,"log2FoldChange"])))
-1 1
1099 861

Write them out

dir.create("analysis/DESeq2/Kallisto-transcript/temperature-effect",
           showWarnings = FALSE, recursive=TRUE)
write.csv(cbind(txID=rownames(res)[sel],tx.annot[sel,]),
      file=file.path("analysis/DESeq2/Kallisto-transcript/temperature-effect",
                     "Warm-vs-Cold-final-temperature_one-percent-FDR-cutoff_significant-transcripts.txt"))

write.csv(cbind(txID=rownames(res)[sel & sign(res$log2FoldChange)==1],
            tx.annot[sel & sign(res$log2FoldChange)==1,]),
      file=file.path("analysis/DESeq2/Kallisto-transcript/temperature-effect",
                     "Warm-final-temperature-up-regulated_one-percent-FDR-cutoff_significant-transcripts.txt"))

write.csv(cbind(txID=rownames(res)[sel & sign(res$log2FoldChange)==-1],
                tx.annot[sel & sign(res$log2FoldChange) == -1,]),
      file=file.path("analysis/DESeq2/Kallisto-transcript/temperature-effect",
                     "Cold-final-temperature-up-regulated_one-percent-FDR-cutoff_significant-transcripts.txt"))

write.csv(cbind(as.data.frame(res),tx.annot),
          file=file.path("analysis/DESeq2/Kallisto-transcript/temperature-effect",
                         "Warm-vs-Cold-final-temperature.csv"))

t.warm.vs.cold <- rownames(res)[sel]

3.4.2.1 Cluster the VST expression of the temperature genes

The color code for the column next to the row dendogram (genes) is yellow for significant positive fold change and dark orange for significant negative fold change

heatmap.2(as.matrix(vst.t[rownames(vst.t) %in% t.warm.vs.cold,]),
          scale="row",labRow=NA,trace="none",
          RowSideColors=ifelse(sign(res[sel,"log2FoldChange"])==-1,
                               "darkorange",
                               "yellow"),
          labCol=paste(colData(dds.t)$genotype,colData(dds.t)$temp,sep="-"))

3.4.2.2 Interactive MDS

res.df <- as.data.frame(res)
res.df$log10MeanNormCount <- log10(res.df$baseMean)
idx <- rowSums(counts(dds.t)) > 0
res.df <- res.df[idx,]
res.df$padj[is.na(res.df$padj)] <- 1
glMDPlot(res.df,
         xval = "log10MeanNormCount",
         yval = "log2FoldChange",
         counts = counts(dds.t)[idx,],
         anno = tx.annot[idx,],
         groups = dds.t$genotype,
         samples = paste(samples$Genotype,samples$Temp,samples$Replicate,sep="-"),
         status = sel[idx],
         display.columns = c("TranscriptID", "SYMBOL","GENENAME"),
         id.column="TranscriptID",
         path = "analysis/DESeq2/Kallisto-transcript/temperature-effect",
         folder = "report",launch=FALSE)

3.4.3 Interaction term between pcp and final temp

res <- results(dds.t,name="genotypepcp.finalTempW")

assume a 1% FDR

alpha=0.01

There are not many genes that appear differentially expressed. The log2 fold-change range is relatively broad, with one extreme value.

plotMA(res,alpha)

Plot the log odds vs. log2 fold change

The volcano plot shows the same results as the MA plot; a large number of genes show large insignificant fold-changes in term of the interaction of pcp and final temperature

volcanoPlot(res,alpha=alpha)

Plot the adjusted p-value histogram

Which is almost evenly distributed, with an enrichment for higher p-values (less significant)

hist(res$padj,breaks=seq(0,1,.01),xlab="FDR")

Select genes below alpha

Note the 0.5 cutoff on the log fold change is motivated by the Schurch et al. RNA, 2016 publication

sel <- res$padj<alpha & !is.na(res$padj) & abs(res$log2FoldChange) >= 0.5
sprintf("There are %s transcripts differentially expressed at a %s cutoff",
        sum(sel),alpha)
## [1] "There are 285 transcripts differentially expressed at a 0.01 cutoff"

Evenly distributed

pander(table(sign(res[sel,"log2FoldChange"])))
-1 1
143 142

Write them out

dir.create("analysis/DESeq2/Kallisto-transcript/interaction-term",
           showWarnings = FALSE, recursive = TRUE)
write.csv(cbind(txID=rownames(res)[sel],tx.annot[sel,]),
      file=file.path("analysis/DESeq2/Kallisto-transcript/interaction-term",
                     "pcp-final-temperature-interaction_one-percent-FDR-cutoff_significant-transcripts.txt"))

write.csv(cbind(txID=rownames(res)[sel & sign(res$log2FoldChange)==1],
                tx.annot[sel & sign(res$log2FoldChange)==1,]),
      file=file.path("analysis/DESeq2/Kallisto-transcript/interaction-term",
                     "pcp-final-temperature-interaction-up-regulated_one-percent-FDR-cutoff_significant-transcripts.txt"))

write.csv(cbind(txID=rownames(res)[sel & sign(res$log2FoldChange)==-1],
                tx.annot[sel & sign(res$log2FoldChange) == -1,]),
      file=file.path("analysis/DESeq2/Kallisto-transcript/interaction-term",
                     "pcp-final-temperature-interaction-down-regulated_one-percent-FDR-cutoff_significant-transcripts.txt"))

write.csv(cbind(as.data.frame(res),tx.annot),
          file=file.path("analysis/DESeq2/Kallisto-transcript/interaction-term",
                         "pcp-final-temperature-interaction.csv"))

t.pcp.temperature <- rownames(res)[sel]

3.4.3.1 Cluster the VST expression of the interaction genes

The color code for the column next to the row dendogram (genes) is yellow for significant positive fold change and dark orange for significant negative fold change

heatmap.2(as.matrix(vst.t[rownames(vst.t) %in% t.pcp.temperature,]),
          scale="row",labRow=NA,trace="none",
          RowSideColors=ifelse(sign(res[sel,"log2FoldChange"])==-1,
                               "darkorange",
                               "yellow"),
          labCol=paste(colData(dds.t)$genotype,colData(dds.t)$temp,sep="-"))

3.4.3.2 Interactive MDS

res.df <- as.data.frame(res)
res.df$log10MeanNormCount <- log10(res.df$baseMean)
idx <- rowSums(counts(dds.t)) > 0
res.df <- res.df[idx,]
res.df$padj[is.na(res.df$padj)] <- 1
glMDPlot(res.df,
         xval = "log10MeanNormCount",
         yval = "log2FoldChange",
         counts = counts(dds.t)[idx,],
         anno = tx.annot[idx,],
         groups = dds.t$genotype,
         samples = paste(samples$Genotype,samples$Temp,samples$Replicate,sep="-"),
         status = sel[idx],
         display.columns = c("TranscriptID", "SYMBOL","GENENAME"),
         id.column="TranscriptID",
         path = "analysis/DESeq2/Kallisto-transcript/interaction-term",
         folder = "report",launch=FALSE)

3.4.3.3 Soft clustering (Mfuzz)

Create the eSet

eset <- ExpressionSet(as.matrix(vst.t[sel,]))

Standardise

eset.s <- standardise(eset)

Average the replicates (mean)

eset.m <- ExpressionSet(
    sapply(split.data.frame(t(exprs(eset.s)),
                            paste0(samples$Genotype,samples$Temp)),
           colMeans))

Estimate the fuzzification

m <- mestimate(eset.m)

Find the clusters (12 is based on the previous heatmap)

cl <- mfuzz(eset.m,centers=12,m=m)

Order the replicates

ord <- c(4,1:3,8,5:7)

There are a number of clusters that behave similarly

mfuzz.plot(eset.m[,ord],
           cl=cl,
           mfrow=c(1,1),
           time.labels = colnames(eset.m)[ord],
           new.window=FALSE)

Writing the core transcripts from the clusters

dir.create("analysis/DESeq2/Kallisto-transcript/interaction-term/clusters",
           showWarnings = FALSE)

Not all genes will be part of the core genes from the clusters

dev.null <- sapply(1:12,function(i,d){
  nam <- d[[i]]
  write.csv(cbind(ID=nam,tx.annot[nam,]),
            file=file.path("analysis/DESeq2/Kallisto-transcript/interaction-term/clusters",
                           paste("cluster",i,sep="-")))
},lapply(sapply(acore(eset.m,cl,0.5),"[[","NAME"),as.character))

3.4.4 Check the overlap of the three sets

plot.new()
grid.draw(venn.diagram(list(
  pcp.vs.wt=t.pcp.vs.wt,
  warm.vs.cold=t.warm.vs.cold,
  pcp.temperature=t.pcp.temperature),
  filename=NULL,
  col=pal[1:3],
  category.names=c("pcp.vs.wt","warm.vs.cold","pcp.by.temperature")))

3.4.5 Conclusion

The results are highly similar to those obtained at the gene level

3.5 Transcript differential usage

3.5.1 Gene and transcript expression distribution

plot.multidensity(lapply(1:ncol(vst.g),function(i){vst.g[,i]}),
                  col=pal[as.integer(colData(dds.g)$genotype)],
                  xlab="gene expression"
)
abline(v=1,col="grey",lty=3)

plot.multidensity(lapply(1:ncol(vst.t),function(i){vst.t[,i]}),
  col=pal[as.integer(colData(dds.t)$genotype)],
  xlab="transcript expression"
)
abline(v=1,col="grey",lty=3)

How many genes have alternative transcripts reported?

tx2gene <- read.table("/mnt/picea/storage/reference/Arabidopsis-thaliana/TAIR10/kallisto/tx2gene.txt",
                      header=TRUE)
tx2gene <- tx2gene[,2:1]

Is that not surprisingly little?

barplot(table(table(tx2gene$GENEID)))

Select the genes with isoforms

gwi <- names(table(tx2gene$GENEID)[table(tx2gene$GENEID) > 1])
message(paste("There are",length(gwi),"genes with splice isoforms"))
## There are 5885 genes with splice isoforms

Plotting function IMPORTANT: To be plotted, genes need to have an expression > 1 in at least 3 samples. Moreover, some samples will be removed from the plot if they have no expression

plotTx <- function(tpt,tpg,dendro,tp,e.cutoff=1,s.cutoff=3){
  #options(warn=2)
  col.sel <- match(dendro,colnames(vst.g))

  # plot all gene transcripts where the transcript is only found DE
  dev.null <- sapply(tpt[tpt %in% gwi][! tpt[tpt %in% gwi] %in% tpg],function(nam){
    #heatmap.2(as.matrix(rbind(vst.t[grep(nam,rownames(vst.t)),],
    #                          vst.g[nam,])),cexRow=0.6)

    #message(nam)
    row.sel <- grepl(nam,rownames(vst.t))
    mat <- as.matrix(rbind(
      vst.g[nam,col.sel],
      vst.t[row.sel,col.sel]))

    de.sel <- rownames(vst.t)[row.sel] %in% tp[grep(nam,tp)]

    if(any(rowSums(mat[-1,][de.sel,,drop=FALSE] > e.cutoff)>s.cutoff)){

      plotmatrix(mat,
        col=c(1,pal[1:sum(row.sel)]),
        lty=c(2,2 - (de.sel)),
        xlabels=paste(samples$Genotype,samples$Temp)[col.sel],las=2,
        main=paste(nam,"transcripts expression"))

      smat <- scale(mat)
      # if a sample has consistently no expression
      col.skip <- is.na(colSums(smat))

      plotmatrix(smat[,!col.skip],
        col=c(1,pal[1:sum(row.sel)]),
        lty=c(2,2 - (rownames(vst.t)[row.sel] %in% tp[grep(nam,tp)])),
        xlabels=paste(samples$Genotype,samples$Temp)[col.sel][!col.skip],las=2,
        main=paste(nam,"z-score"))

      return(1);
    }
    return(0);
  })
  return(sum(dev.null))
}

plotG <- function(tpt,tpg,dendro,tp,e.cutoff=1,s.cutoff=3){
  #options(warn=2)
  col.sel <- match(dendro,colnames(vst.g))

  # plot all gene transcripts where the transcript is only found DE
  dev.null <- sapply(tpg[tpg %in% gwi][! tpg[tpg %in% gwi] %in% tpt],function(nam){
    #heatmap.2(as.matrix(rbind(vst.t[grep(nam,rownames(vst.t)),],
    #                          vst.g[nam,])),cexRow=0.6)

    #message(nam)
    row.sel <- grepl(nam,rownames(vst.t))
    mat <- as.matrix(rbind(
      vst.g[nam,col.sel],
      vst.t[row.sel,col.sel]))

    de.sel <- rownames(vst.t)[row.sel] %in% tp[grep(nam,tp)]

    #if(any(rowSums(mat[-1,][de.sel,,drop=FALSE] > e.cutoff)>s.cutoff)){

      plotmatrix(mat,
                 col=c(1,pal[1:sum(row.sel)]),
                 lty=c(1,2 - (de.sel)),
                 xlabels=paste(samples$Genotype,samples$Temp)[col.sel],las=2,
                 main=paste(nam,"transcripts expression"))

      smat <- scale(mat)
      # if a sample has consistently no expression
      col.skip <- is.na(colSums(smat))

      plotmatrix(smat[,!col.skip],
                 col=c(1,pal[1:sum(row.sel)]),
                 lty=c(1,2 - (rownames(vst.t)[row.sel] %in% tp[grep(nam,tp)])),
                 xlabels=paste(samples$Genotype,samples$Temp)[col.sel][!col.skip],las=2,
                 main=paste(nam,"z-score"))

    #  return(1);
    #}
    #return(0);
  })
  #return(sum(dev.null))
}

3.5.2 pcp vs. Col.0

tpt <- unique(sub("\\.[0-9]+$","",t.pcp.vs.wt))

plot.new()
grid.draw(venn.diagram(list(
  gene=pcp.vs.wt[pcp.vs.wt %in% gwi],
  transcript=tpt[tpt %in% gwi]),
  filename=NULL,
  col=pal[1:2],
  category.names=c("gene","transcript")))

Order the samples as in the heatmap

dendro <- as.character(c(2,1,3,
                         37,39,38,
                         25,26,27,
                         13,14,15,
                         30,28,29,
                         18,17,16,
                         42,40,41,
                         5,6,4))
pdf("analysis/DESeq2/Kallisto-transcript/non-de-gene-transcript-abundance_by-genotype.pdf",height=12,width=8)
par(mfrow=c(3,2))
message(paste("There are",
              plotTx(tpt,pcp.vs.wt,dendro,t.pcp.vs.wt,s.cutoff=6),
              "transcripts passing the thresholds"))
## There are 227 transcripts passing the thresholds
dev.off()
## png 
##   2
pdf("analysis/DESeq2/Kallisto-transcript/de-gene-transcript-abundance_by-genotype.pdf",height=12,width=8)
par(mfrow=c(3,2))
dev.null <- plotG(tpt,pcp.vs.wt,dendro,t.pcp.vs.wt)
dev.off()
## png 
##   2

3.5.3 Final temperature Warm vs. Cold

tpt <- unique(sub("\\.[0-9]+$","",t.warm.vs.cold))

plot.new()
grid.draw(venn.diagram(list(
  gene=warm.vs.cold[warm.vs.cold %in% gwi],
  transcript=tpt[tpt %in% gwi]),
  filename=NULL,
  col=pal[1:2],
  category.names=c("gene","transcript")))

Order the samples as in the heatmap

dendro <- as.character(c(13,14,15,
                         25,26,27,
                         30,28,29,
                         18,17,16,
                         42,40,41,
                         5,6,4,
                         2,1,3,
                         37,39,38))

pdf("analysis/DESeq2/Kallisto-transcript/non-de-gene-transcript-abundance_by-temperature.pdf",height=12,width=8)
par(mfrow=c(3,2))
message(paste("There are",
              plotTx(tpt,warm.vs.cold,dendro,t.warm.vs.cold,s.cutoff=6),
              "transcripts passing the thresholds"))
## There are 144 transcripts passing the thresholds
dev.off()
## png 
##   2
pdf("analysis/DESeq2/Kallisto-transcript/de-gene-transcript-abundance_by-temperature.pdf",height=12,width=8)
par(mfrow=c(3,2))
dev.null <- plotG(tpt,warm.vs.cold,dendro,t.warm.vs.cold)
dev.off()
## png 
##   2

3.5.4 Interaction term between pcp and final temp

tpt <- unique(sub("\\.[0-9]+$","",t.pcp.temperature))

plot.new()
grid.draw(venn.diagram(list(
  gene=pcp.temperature[pcp.temperature %in% gwi],
  transcript=tpt[tpt %in% gwi]),
  filename=NULL,
  col=pal[1:2],
  category.names=c("gene","transcript")))

Order the samples as in the heatmap

dendro <- as.character(c(15,13,14,
                         25,27,26,
                         18,17,16,
                         30,29,28,
                         3,2,1,
                         4,5,6,
                         42,41,40,
                         37,38,39))

pdf("analysis/DESeq2/Kallisto-transcript/non-de-gene-transcript-abundance_by-interaction.pdf",height=12,width=8)
par(mfrow=c(3,2))
message(paste("There are", plotTx(tpt,pcp.temperature,dendro,t.pcp.temperature),"transcripts passing the thresholds"))
## There are 27 transcripts passing the thresholds
dev.off()
## png 
##   2
pdf("analysis/DESeq2/Kallisto-transcript/de-gene-transcript-abundance_by-interaction.pdf",height=12,width=8)
par(mfrow=c(3,2))
dev.null <- plotG(tpt,pcp.temperature,dendro,t.pcp.temperature)
dev.off()
## png 
##   2

4 Session Info

## R version 3.3.2 (2016-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.1 LTS
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] VennDiagram_1.6.17         futile.logger_1.4.3       
##  [3] RColorBrewer_1.1-2         pander_0.6.0              
##  [5] org.At.tair.db_3.4.0       AnnotationDbi_1.36.0      
##  [7] Mfuzz_2.34.0               e1071_1.6-7               
##  [9] LSD_3.0                    Glimma_1.2.1              
## [11] gplots_3.0.1               DESeq2_1.14.1             
## [13] SummarizedExperiment_1.4.0 Biobase_2.34.0            
## [15] GenomicRanges_1.26.1       GenomeInfoDb_1.10.1       
## [17] IRanges_2.8.1              S4Vectors_0.12.1          
## [19] BiocGenerics_0.20.0       
## 
## loaded via a namespace (and not attached):
##  [1] edgeR_3.16.5         splines_3.3.2        gtools_3.5.0        
##  [4] Formula_1.2-1        assertthat_0.1       latticeExtra_0.6-28 
##  [7] yaml_2.1.14          RSQLite_1.1-1        backports_1.0.4     
## [10] lattice_0.20-34      limma_3.30.7         digest_0.6.10       
## [13] XVector_0.14.0       colorspace_1.3-2     htmltools_0.3.5     
## [16] Matrix_1.2-7.1       plyr_1.8.4           XML_3.98-1.5        
## [19] genefilter_1.56.0    zlibbioc_1.20.0      xtable_1.8-2        
## [22] scales_0.4.1         gdata_2.17.0         BiocParallel_1.8.1  
## [25] htmlTable_1.7        tibble_1.2           openssl_0.9.5       
## [28] annotate_1.52.0      ggplot2_2.2.0        nnet_7.3-12         
## [31] lazyeval_0.2.0       survival_2.40-1      magrittr_1.5        
## [34] memoise_1.0.0        evaluate_0.10        foreign_0.8-67      
## [37] class_7.3-14         tools_3.3.2          data.table_1.10.0   
## [40] stringr_1.1.0        munsell_0.4.3        locfit_1.5-9.1      
## [43] cluster_2.0.5        lambda.r_1.1.9       base64_2.0          
## [46] caTools_1.17.1       RCurl_1.95-4.8       bitops_1.0-6        
## [49] rmarkdown_1.2        gtable_0.2.0         DBI_0.5-1           
## [52] gridExtra_2.2.1      knitr_1.15.1         Hmisc_4.0-1         
## [55] rprojroot_1.1        futile.options_1.0.0 KernSmooth_2.23-15  
## [58] stringi_1.1.2        Rcpp_0.12.8          geneplotter_1.52.0  
## [61] rpart_4.1-10         acepack_1.4.1